{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 11、PyTorch中如何进行向量微分、矩阵微分与计算雅克比行列式"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 雅可比矩阵用法"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.autograd.functional import jacobian"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(tensor([[-1.6274, -0.6187,  1.4563],\n",
       "         [-1.0267, -0.0240,  0.5119]]),\n",
       " tensor([5.0250, 3.0029]))"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def func(x):\n",
    "    return x.exp().sum(dim=1)\n",
    "\n",
    "\n",
    "x = torch.randn(2, 3)\n",
    "y = func(x)\n",
    "x, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[[0.1964, 0.5387, 4.2899],\n",
       "         [0.0000, 0.0000, 0.0000]],\n",
       "\n",
       "        [[0.0000, 0.0000, 0.0000],\n",
       "         [0.3582, 0.9763, 1.6685]]])"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# y 对于 x 的雅可比矩阵\n",
    "jacobian(func, x)\n",
    "# [[[0.1964, 0.5387, 4.2899], # y[0] 来自第一行的偏导\n",
    "# [0.0000, 0.0000, 0.0000]], # y[0] 来自第二行的偏导是0\n",
    "\n",
    "# [[0.0000, 0.0000, 0.0000], # y[1] 来自第一行的偏到是0\n",
    "# [0.3582, 0.9763, 1.6685]]]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([1.2148, 0.5800, 0.1546])\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(tensor([-2.1449, -0.3914,  1.0567]), tensor([-0.9301,  0.1886,  1.2113]))"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from torch import Tensor\n",
    "\n",
    "\n",
    "aaa: Tensor = torch.randn(3)\n",
    "print(aaa)\n",
    "\n",
    "\n",
    "def func(x):\n",
    "    return x + aaa\n",
    "\n",
    "\n",
    "x: Tensor = torch.randn(3)\n",
    "y: Tensor = func(x)\n",
    "x, y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([[1., 0., 0.],\n",
       "        [0., 1., 0.],\n",
       "        [0., 0., 1.]])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jacobian(func, x)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 使用backward函数求梯度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([1., 1., 1.])\n"
     ]
    }
   ],
   "source": [
    "x = torch.randn(3, requires_grad=True)\n",
    "y = func(x)\n",
    "# y.backward() # 报错\n",
    "y.backward(torch.ones_like(y))  # 存在一个临时的张量，这里是 偏t再偏y\n",
    "print(x.grad)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 使用雅可比矩阵求梯度"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor([1., 1., 1.])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "jcb = jacobian(func, x)\n",
    "torch.ones_like(y) @ jcb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 矩阵到矩阵之间求导"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-0.4925,  0.6456, -0.7058],\n",
      "        [-0.8978,  0.1272, -0.1797]], requires_grad=True)\n",
      "tensor([[-0.2775,  0.6865],\n",
      "        [ 0.7276, -0.6722],\n",
      "        [ 1.6039, -1.2717]], requires_grad=True)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(tensor([[0.4090, 0.0554, 0.3322],\n",
       "         [0.4090, 0.0554, 0.3322]]),\n",
       " torch.Size([2, 3]),\n",
       " tensor([[-1.3903, -1.3903],\n",
       "         [ 0.7727,  0.7727],\n",
       "         [-0.8856, -0.8856]]),\n",
       " torch.Size([3, 2]))"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = torch.randn(2, 3, requires_grad=True)\n",
    "b = torch.randn(3, 2, requires_grad=True)\n",
    "c = a @ b\n",
    "print(a, b, sep=\"\\n\")\n",
    "# c.backward() # 报错，必须是标量\n",
    "c.backward(torch.ones_like(c))\n",
    "a.grad, a.grad.shape, b.grad, b.grad.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([0.4090, 0.0554, 0.3322])\n",
      "tensor([0.4090, 0.0554, 0.3322])\n"
     ]
    }
   ],
   "source": [
    "# 使用雅可比矩阵\n",
    "def funcb(a):\n",
    "    return a @ b\n",
    "\n",
    "\n",
    "# 第一行的梯度：\n",
    "jcb = jacobian(funcb, a[0, :])\n",
    "c = funcb(a[0, :])\n",
    "print(torch.ones_like(c) @ jcb)  # 实际值为 b 的每一行加起来\n",
    "# 第二行的梯度：\n",
    "c = funcb(a[1, :])\n",
    "jcb = jacobian(funcb, a[1, :])\n",
    "print(torch.ones_like(c) @ jcb)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([-1.3903,  0.7727, -0.8856])\n",
      "tensor([-1.3903,  0.7727, -0.8856])\n"
     ]
    }
   ],
   "source": [
    "# 使用雅可比矩阵\n",
    "def funca(b):\n",
    "    return a @ b\n",
    "\n",
    "\n",
    "# 第一行的梯度：\n",
    "jcb = jacobian(funca, b[:, 0])\n",
    "c = funcb(b[:, 0])\n",
    "print(torch.ones_like(c) @ jcb)  # 实际值为 a 的每一列加起来\n",
    "# 第二行的梯度：\n",
    "c = funcb(b[:, 1])\n",
    "jcb = jacobian(funca, b[:, 1])\n",
    "print(torch.ones_like(c) @ jcb)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "dl",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
